In [1]:
from google.colab import drive
drive.mount('/content/drive')
Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive
In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [3]:
data = pd.read_csv('covid_19_clean_complete.csv')
In [4]:
data.head()
Out[4]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered WHO Region
0 NaN Afghanistan 33.0000 65.0000 1/22/20 0 0 0 emro
1 NaN Albania 41.1533 20.1683 1/22/20 0 0 0 euro
2 NaN Algeria 28.0339 1.6596 1/22/20 0 0 0 afro
3 NaN Andorra 42.5063 1.5218 1/22/20 0 0 0 euro
4 NaN Angola -11.2027 17.8739 1/22/20 0 0 0 afro
In [5]:
data.isnull().sum()
Out[5]:
Province/State    25530
Country/Region        0
Lat                   0
Long                  0
Date                  0
Confirmed             0
Deaths                0
Recovered             0
WHO Region          276
dtype: int64
In [6]:
df = data.dropna(axis='columns')
In [7]:
df.head()
Out[7]:
Country/Region Lat Long Date Confirmed Deaths Recovered
0 Afghanistan 33.0000 65.0000 1/22/20 0 0 0
1 Albania 41.1533 20.1683 1/22/20 0 0 0
2 Algeria 28.0339 1.6596 1/22/20 0 0 0
3 Andorra 42.5063 1.5218 1/22/20 0 0 0
4 Angola -11.2027 17.8739 1/22/20 0 0 0
In [8]:
df.shape
Out[8]:
(36570, 7)

Adding new columns called "Still Infected"

In [9]:
df['Still Infected'] = df['Confirmed'] - df['Deaths'] - df['Recovered']
C:\Users\ilham\anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
In [10]:
df.head()
Out[10]:
Country/Region Lat Long Date Confirmed Deaths Recovered Still Infected
0 Afghanistan 33.0000 65.0000 1/22/20 0 0 0 0
1 Albania 41.1533 20.1683 1/22/20 0 0 0 0
2 Algeria 28.0339 1.6596 1/22/20 0 0 0 0
3 Andorra 42.5063 1.5218 1/22/20 0 0 0 0
4 Angola -11.2027 17.8739 1/22/20 0 0 0 0

Pre-Processing

In [11]:
full_latest = df[df['Date'] == max(df['Date'])].reset_index()
full_latest_grouped = full_latest.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'Still Infected'].sum().reset_index()
C:\Users\ilham\anaconda3\lib\site-packages\ipykernel_launcher.py:2: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
  
In [12]:
full_latest_grouped.head()
Out[12]:
Country/Region Confirmed Deaths Recovered Still Infected
0 Afghanistan 20342 357 1875 18110
1 Albania 1246 34 938 274
2 Algeria 10154 707 6717 2730
3 Andorra 852 51 744 57
4 Angola 91 4 24 63
In [13]:
temp = df.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'Still Infected'].max()
temp.style.background_gradient(cmap='Pastel1_r')
C:\Users\ilham\anaconda3\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
  """Entry point for launching an IPython kernel.
Out[13]:
Confirmed Deaths Recovered Still Infected
Country/Region
Afghanistan 20342 357 1875 18110
Albania 1246 34 938 289
Algeria 10154 707 6717 3162
Andorra 852 51 744 504
Angola 91 4 24 64
Antigua and Barbuda 26 3 20 21
Argentina 22794 664 6909 15221
Armenia 13130 200 4014 8916
Australia 3112 48 2724 2868
Austria 16902 672 15793 9334
Azerbaijan 7553 88 4149 3316
Bahamas 103 11 62 55
Bahrain 14763 26 9468 5547
Bangladesh 65769 888 13903 50978
Barbados 92 7 81 56
Belarus 48630 269 23647 24817
Belgium 59226 9595 16291 33340
Belize 19 2 16 16
Benin 339 3 151 255
Bhutan 59 0 11 48
Bolivia 13643 465 2086 11092
Bosnia and Herzegovina 2606 159 1968 1012
Botswana 40 1 23 22
Brazil 691758 36455 283952 371351
Brunei 141 2 138 109
Bulgaria 2727 160 1548 1513
Burkina Faso 889 53 765 321
Burma 242 6 156 131
Burundi 83 1 45 37
Cabo Verde 554 5 240 309
Cambodia 126 0 123 88
Cameroon 7908 212 0 7696
Canada 52849 4978 0 47871
Central African Republic 1634 5 38 1591
Chad 837 69 672 400
Chile 134150 1637 108150 59099
China 68135 4512 64435 50633
Colombia 38149 1265 14414 22470
Comoros 141 2 67 103
Congo (Brazzaville) 683 22 210 451
Congo (Kinshasa) 4016 85 537 3394
Costa Rica 1318 10 704 604
Cote d'Ivoire 3739 36 1818 1885
Croatia 2247 104 2126 1258
Cuba 2191 83 1862 849
Cyprus 964 18 807 692
Czechia 9628 327 0 9301
Denmark 11948 589 10755 3799
Diamond Princess 712 13 651 691
Djibouti 4207 28 1877 2390
Dominica 18 0 16 14
Dominican Republic 19600 538 12007 7956
Ecuador 43120 3621 21020 30088
Egypt 34079 1237 8961 23881
El Salvador 3015 53 1305 1657
Equatorial Guinea 1306 12 200 1094
Eritrea 39 0 39 36
Estonia 1939 69 1681 1395
Eswatini 333 3 225 154
Ethiopia 2020 27 344 1649
Fiji 18 0 18 17
Finland 6981 323 5800 2865
France 188836 29100 68283 97979
Gabon 3101 21 833 2247
Gambia 26 1 21 12
Georgia 809 13 674 367
Germany 185750 8685 169224 72864
Ghana 9638 44 3636 5958
Greece 2997 180 1374 1860
Grenada 23 0 0 23
Guatemala 7055 252 1261 5542
Guinea 4117 23 2877 1686
Guinea-Bissau 1368 12 153 1278
Guyana 154 12 80 73
Haiti 3334 51 29 3259
Holy See 12 0 12 10
Honduras 6327 258 712 5357
Hungary 4008 546 2279 2054
Iceland 1807 10 1794 1096
India 257486 7207 123848 126431
Indonesia 31186 1851 10498 18837
Iran 171789 8281 134349 34887
Iraq 12366 346 5186 6834
Ireland 25201 1679 22698 14888
Israel 17863 298 15091 9808
Italy 234998 33899 165837 108257
Jamaica 598 10 405 424
Japan 17039 917 14990 11869
Jordan 808 9 607 249
Kazakhstan 12694 56 7376 5779
Kenya 2767 84 752 1931
Kosovo 1142 30 871 576
Kuwait 31848 264 20205 15831
Kyrgyzstan 2007 22 1425 620
Laos 19 0 0 19
Latvia 1088 25 781 646
Lebanon 1331 30 768 684
Lesotho 4 0 2 2
Liberia 359 30 194 135
Libya 256 5 52 199
Liechtenstein 82 1 55 76
Lithuania 1714 71 1328 1047
Luxembourg 4039 110 3899 2870
MS Zaandam 9 2 0 7
Madagascar 1052 9 233 810
Malawi 438 4 55 379
Malaysia 8322 117 6674 2596
Maldives 1903 8 827 1389
Mali 1533 90 873 588
Malta 629 9 596 352
Mauritania 1049 55 108 886
Mauritius 337 10 324 286
Mexico 117103 13699 83775 19629
Moldova 9700 341 5638 3730
Monaco 99 4 93 86
Mongolia 193 0 75 141
Montenegro 324 9 315 266
Morocco 8224 208 7364 3321
Mozambique 424 2 0 422
Namibia 29 0 16 13
Nepal 3448 13 467 2968
Netherlands 47574 6013 98 41561
New Zealand 1504 22 1482 929
Nicaragua 1118 46 370 702
Niger 973 65 867 511
Nigeria 12486 354 3959 8173
North Macedonia 3025 153 1646 1226
Norway 8547 238 8138 8042
Oman 16882 75 3451 13356
Pakistan 98943 2002 33465 63476
Panama 16425 393 10218 6953
Papua New Guinea 8 0 8 8
Paraguay 1135 11 575 588
Peru 196515 5465 86219 104831
Philippines 21895 1003 4530 16362
Poland 26561 1157 12855 12549
Portugal 34693 1479 20995 24065
Qatar 68790 54 44338 35634
Romania 20479 1333 14638 7716
Russia 467073 5851 226272 234950
Rwanda 439 2 290 147
Saint Kitts and Nevis 15 0 15 15
Saint Lucia 19 0 18 14
Saint Vincent and the Grenadines 27 0 16 12
San Marino 680 42 428 475
Sao Tome and Principe 513 12 68 433
Saudi Arabia 101914 712 72817 28728
Senegal 4328 49 2588 1839
Serbia 11823 249 11348 7786
Seychelles 11 0 11 11
Sierra Leone 969 48 608 440
Singapore 37910 25 24886 20799
Slovakia 1528 28 1389 1022
Slovenia 1485 109 1359 1109
Somalia 2334 83 441 1810
South Africa 48285 998 24364 22923
South Korea 11814 273 10563 7577
South Sudan 1317 14 0 1303
Spain 241550 28752 150376 101617
Sri Lanka 1835 11 941 947
Sudan 6081 359 2014 3708
Suriname 122 1 9 112
Sweden 44730 4659 0 40071
Switzerland 30965 1921 28700 14349
Syria 141 6 0 135
Taiwan* 443 7 430 314
Tajikistan 4529 48 0 4481
Tanzania 509 21 183 305
Thailand 3112 58 2972 1451
Timor-Leste 24 0 0 24
Togo 495 13 248 234
Trinidad and Tobago 117 8 108 100
Tunisia 1087 49 982 798
Turkey 170132 4692 137969 80808
US 1942363 110514 506367 1325482
Uganda 616 0 96 520
Ukraine 27599 796 12513 14290
United Arab Emirates 38808 276 21806 17173
United Kingdom 286194 40542 528 245652
Uruguay 845 23 730 319
Uzbekistan 4331 17 3354 1361
Venezuela 2377 22 487 1909
Vietnam 331 0 307 163
West Bank and Gaza 472 3 403 275
Western Sahara 9 1 6 6
Yemen 484 112 0 372
Zambia 1089 7 912 628
Zimbabwe 282 4 34 244
In [14]:
temp = df.groupby('Date')['Confirmed', 'Deaths', 'Recovered', 'Still Infected'].sum()
temp = temp.reset_index()
temp = temp.sort_values('Date', ascending=False)
temp.head(1).reset_index(drop=True).style.background_gradient(cmap="Pastel1")
C:\Users\ilham\anaconda3\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
  """Entry point for launching an IPython kernel.
Out[14]:
Date Confirmed Deaths Recovered Still Infected
0 6/7/20 7009058 402730 3068626 3537702

Country with no Recovered Cases

In [15]:
temp = full_latest_grouped[full_latest_grouped['Recovered']==0]
temp = temp[['Country/Region', 'Confirmed', 'Deaths', 'Recovered']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Reds')
Out[15]:
Country/Region Confirmed Deaths Recovered
0 Canada 97178 7877 0
1 Sweden 44730 4659 0
2 Czechia 9628 327 0
3 Cameroon 7908 212 0
4 Tajikistan 4529 48 0
5 South Sudan 1317 14 0
6 Yemen 484 112 0
7 Mozambique 424 2 0
8 Syria 141 6 0
9 Timor-Leste 24 0 0
10 Grenada 23 0 0
11 Laos 19 0 0
12 MS Zaandam 9 2 0
In [16]:
from plotnine import *

import plotly.express as px
import folium
import seaborn as sns
In [17]:
c = '#393e46'
d = '#ff2e63'
r = '#30e3ca'
i = '#f8b400'

cdr = [c, d, r] #grey - red - blue
idr  = [i, d, r] #yellow - red - blue

fig = px.bar(full_latest_grouped[['Country/Region', 'Confirmed']].sort_values('Confirmed', ascending=False),
             y="Confirmed", x="Country/Region", color='Country/Region',
             log_y=True, template='ggplot2', title='Confirmed Cases')
fig.show()
In [18]:
fig = px.bar(full_latest_grouped[['Country/Region', 'Deaths']].sort_values('Deaths', ascending=False),
             y="Deaths", x="Country/Region", color='Country/Region',
             log_y=True, template='ggplot2', title='Deaths Cases')
fig.show()
In [19]:
temp = df.groupby(['Country/Region', 'Date'])['Confirmed', 'Deaths', 'Recovered'].sum()
temp = temp.reset_index()

fig = px.bar(temp, x="Date", y="Confirmed", color="Country/Region", orientation='v', height=600,
            title='Confirmed Cases', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()
C:\Users\ilham\anaconda3\lib\site-packages\ipykernel_launcher.py:1: FutureWarning:

Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.

In [20]:
temp = df.groupby('Date').sum().reset_index()
temp.head()
Out[20]:
Date Lat Long Confirmed Deaths Recovered Still Infected
0 1/22/20 5613.201163 6063.516762 555 17 28 510
1 1/23/20 5613.201163 6063.516762 654 18 30 606
2 1/24/20 5613.201163 6063.516762 941 26 35 880
3 1/25/20 5613.201163 6063.516762 1434 42 38 1354
4 1/26/20 5613.201163 6063.516762 2118 56 51 2011
In [21]:
temp['No. of Deaths to 100 Confirmed Cases'] = round(temp['Deaths']/
                                                                                                  temp['Confirmed'], 3)*100
temp['No. of Recovered to 100 Confirmed Cases'] = round(temp['Recovered']/
                                                                                                  temp['Confirmed'], 3)*100
temp['No. of Recovered to 1 Death Cases'] = round(temp['Recovered']/
                                                                                                  temp['Deaths'], 3)*100
In [22]:
temp = temp.melt(id_vars='Date',
                value_vars=['No. of Deaths to 100 Confirmed Cases',
                                      'No. of Recovered to 100 Confirmed Cases',
                                      'No. of Recovered to 1 Death Cases' ],
                var_name = 'Ratio',
                value_name='Value')
fig = px.line(temp, x="Date", y="Value", color='Ratio',
             title='Recovery and Mortality Rate Over The Time', color_discrete_sequence=cdr)
fig.show()
In [23]:
m = folium.Map(location=[0, 0], tiles='cartodbpositron',
              min_zoom=1, max_zoom=4, zoom_start=1)
for i in range(0, len(full_latest)):
    folium.Circle(
            location=[full_latest.iloc[i]['Lat'], full_latest.iloc[i]['Long']],
            color='crimson',
            tooltip = '<li><bold>Country : '+str(full_latest.iloc[i]['Country/Region'])+
                            '<li><bold>Confirmed : '+str(full_latest.iloc[i]['Confirmed'])+
                            '<li><bold>Deaths : '+str(full_latest.iloc[i]['Deaths'])+
                            '<li><bold>Recovered : '+str(full_latest.iloc[i]['Recovered']),
            radius=int(full_latest.iloc[i]['Confirmed'])).add_to(m)
    
m
Out[23]:
In [24]:
fig = px.choropleth(full_latest_grouped[full_latest_grouped['Deaths']>0],
                                   locations="Country/Region", locationmode='country names',
                                   color="Deaths", hover_name="Country/Region",
                                   range_color=[1,50], color_continuous_scale="agsunset",
                                   title='Countries With Deaths Reported')
fig.update(layout_coloraxis_showscale=False)
fig.show()
In [25]:
fig = px.choropleth(full_latest_grouped, locations="Country/Region",
                                   locationmode='country names', color="Confirmed",
                                   hover_name="Country/Region", range_color=[1,2000],
                                   color_continuous_scale="aggrnyl",
                                   title='Countries with Confirmed Cases')
fig.update(layout_coloraxis_showscale=False)
fig.show()
In [ ]:
formated_gdf = df.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered'].max()
formated_gdf = formated_gdf.reset_index()
formated_gdf = formated_gdf[formated_gdf['Country/Region'] != 'China']

formated_gdf ['Date'] = pd.to_datetime(formated_gdf['Date'])
formated_gdf['Date'] = formated_gdf['Date'].dt.strftime('%m%d%y')
formated_gdf['size'] = formated_gdf['Confirmed'].pow(0.5)

fig = px.scatter_geo(formated_gdf[formated_gdf['Country/Region'] != 'China'],
                                    locations='Country/Region', locationmode='country names')